rm(list=ls())

Chargement des librairies


library(tidyverse)
library(corrplot)
library(lmerTest)
library(ade4)
library(splines)
library(car)
library(plotly)
library(DT)

Prealable avant analyse statistique des donnees


Import des donnees

setwd(".")
load('cham.Rdata')
datatable(cham, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )

Creation des variables age et long et integration des deux variables dans le data frame cham2

age<-cham$year-cham$coh
long<-cham$ydth-cham$coh
cham2<-cbind(cham, age, long)

Question 1

Représentation graphique des données

Représentation par classe d’age

cham_age<-cham2 %>% 
  group_by(age) %>%
  dplyr::summarise(totnaissance= sum(fec), taillepop=n(), fecperind=totnaissance/n()*100)

plot1 <- ggplot(cham_age, aes(x=age, y=fecperind)) + 
    geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity") + 
    labs(title = "Fécondité de la population en fonction de l'age",x="Age", y="Fécondité") + 
    theme(plot.title = element_text(hjust = 0.5)) + 
    geom_smooth()
ggplotly(plot1)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Representation sans grouper par classe d’age pour identifier s’il existe un point d’inflexion

#####Representation avec la fonction jitter

p2<-ggplot(cham2, aes(x=age, y=fec)) + geom_jitter(width = 0.55, height = 0)
p2<-p2 + labs(title = "Fécondité en fonction de l'age",x="Age", y="Fécondité") + theme(plot.title = element_text(hjust = 0.5))
p2 + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#####Representation avec la fonction geom_count

p2bis<-ggplot(cham2, aes(x=age, y=fec)) + geom_count()
p2bis<-p2bis + labs(title = "Fécondité en fonction de l'age",x="Age", y="Fécondité") + theme(plot.title = element_text(hjust = 0.5))
p2bis + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Modelisation pour verifier s’il existe un lien entre la fécondité annuelle et l’âge de la femelle

Tests de modèles de régression lineaire generalise avec effets aléatoires


First

glm1 <- glmer(fec ~ age + (1| id),data=cham2, family = binomial)
summary(glm1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ age + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1775.1   1790.7   -884.6   1769.1     1325 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6411 -1.1386  0.6819  0.7903  1.0477 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.2129   0.4614  
## Number of obs: 1328, groups:  id, 217
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.58024    0.15766   3.680 0.000233 ***
## age         -0.01703    0.01560  -1.092 0.275013    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr)
## age -0.903
Anova(glm1)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: fec
##      Chisq Df Pr(>Chisq)
## age 1.1916  1      0.275
surdispersion_glm1 <- 1775/1325;surdispersion_glm1
## [1] 1.339623

AIC = 1775 et p-value = 0.27



Pas de surdispersion observee

Second

glm2 <- glmer(fec ~ year + (1| id),data=cham2, family = binomial)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.290051 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?
summary(glm2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ year + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1776.3   1791.8   -885.1   1770.3     1325 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6666 -1.1423  0.6818  0.7912  1.0706 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.2173   0.4662  
## Number of obs: 1328, groups:  id, 217
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  6.5452438  1.3195532   4.960 7.04e-07 ***
## year        -0.0030515  0.0006584  -4.634 3.58e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## year -0.999
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.290051 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

Third

glm3 <- glmer(fec ~ age+ year + (1| id),data=cham2, family = binomial)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.292699 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?
summary(glm3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ age + year + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1777.1   1797.9   -884.6   1769.1     1324 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6486 -1.1413  0.6819  0.7907  1.0438 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.2128   0.4613  
## Number of obs: 1328, groups:  id, 217
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  2.8329285  1.3214401   2.144   0.0320 *
## age         -0.0167623  0.0155980  -1.075   0.2825  
## year        -0.0011244  0.0006564  -1.713   0.0867 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr) age   
## age  -0.095       
## year -0.993 -0.013
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.292699 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

Four

glm4 <- glmer(fec ~ age*year + (1| id),data=cham2, family = binomial)
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 3.403 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?
summary(glm4)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ age * year + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1779.1   1805.1   -884.6   1769.1     1323 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6675 -1.1401  0.6837  0.7906  1.0334 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.2095   0.4577  
## Number of obs: 1328, groups:  id, 217
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.082e+01  1.319e+00   8.207 2.26e-16 ***
## age         -8.721e-01  1.936e-02 -45.053  < 2e-16 ***
## year        -5.109e-03  6.551e-04  -7.799 6.23e-15 ***
## age:year     4.265e-04  8.441e-06  50.533  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##          (Intr) age    year  
## age      -0.073              
## year     -0.993 -0.002       
## age:year -0.007 -0.593 -0.014
## fit warnings:
## Some predictor variables are on very different scales: consider rescaling
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 3.403 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

Five

glm5<- glmer(fec ~ age + (1| id) + (1| year),data=cham2, family = binomial)
summary(glm5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ age + (1 | id) + (1 | year)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1750.5   1771.3   -871.2   1742.5     1324 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9922 -1.0487  0.6251  0.7479  1.4398 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.2402   0.4901  
##  year   (Intercept) 0.2214   0.4705  
## Number of obs: 1328, groups:  id, 217; year, 27
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.65014    0.18973   3.427 0.000611 ***
## age         -0.02212    0.01624  -1.362 0.173287    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr)
## age -0.779
Anova(glm5)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: fec
##      Chisq Df Pr(>Chisq)
## age 1.8543  1     0.1733
surdispersion_glm5 <- 1750/1324;surdispersion_glm5
## [1] 1.321752

AIC = 1750 et p-value = 0.17



Pas de surdispersion observee

Question 2

Représentation graphique des données

Création tableau de la fécondité moyenne par année

cham_ans = cham2 %>% 
  group_by(year) %>% 
  dplyr::summarise(totnaissance= sum(fec), taillepop=n(), agemoyen=mean(age)) %>% 
  mutate(fecperind=totnaissance/taillepop)%>% 
  mutate(ratiofecage=fecperind/agemoyen)

Représentation graphique sans grouper par annee

p3<-ggplot(cham2, aes(x=year, y=fec)) + geom_jitter(width = 0.55, height = 0);p3
p3bis<-ggplot(cham2, aes(x=year, y=fec)) + geom_count()
p3bis<-p3bis + labs(title = "Fécondité en fonction des annees",x="Anees", y="Fécondité") + theme(plot.title = element_text(hjust = 0.5)); p3bis
p4<-ggplot(data = cham_ans, aes(x = year,y=agemoyen))+geom_point(); p4

Représentation graphique par annee

p5<-ggplot(cham_ans, aes(x=year, y=fecperind)) + geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity"); p5
p5 + geom_smooth() 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
p6<-ggplot(data = cham_ans, aes(x = year,y=agemoyen))+geom_point(); p6

Modelisation pour verifier s’il existe un lien entre la fécondité annuelle et le temps

Tests de modèles de régression lineaire generalise avec effets aléatoires

Modele GLM fecondite en fonction de l’annee

glm6 <- glmer(fec ~ year + (1| id),data=cham2, family = binomial)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.290051 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?
summary(glm6)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ year + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1776.3   1791.8   -885.1   1770.3     1325 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6666 -1.1423  0.6818  0.7912  1.0706 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.2173   0.4662  
## Number of obs: 1328, groups:  id, 217
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  6.5452438  1.3195532   4.960 7.04e-07 ***
## year        -0.0030515  0.0006584  -4.634 3.58e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## year -0.999
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.290051 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

Transformation de la variable year en variable normee reduite

year_scale<-scale(cham2$year, center=TRUE, scale= TRUE)

Test à nouveau des modeles GLM

glm7 <- glmer(fec ~ year_scale + (1| id),data=cham2, family = binomial)
summary(glm7)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ year_scale + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1776.3   1791.8   -885.1   1770.3     1325 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6666 -1.1423  0.6818  0.7912  1.0706 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.2173   0.4662  
## Number of obs: 1328, groups:  id, 217
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.42505    0.06783   6.266  3.7e-10 ***
## year_scale  -0.01823    0.06530  -0.279     0.78    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## year_scale -0.001
surdispersion_glm7 <- 1776/1325;surdispersion_glm7
## [1] 1.340377

AIC = 1776 et p-value = 0.78



Pas de surdispersion observee

glm8 <- glmer(fec ~ year_scale+age + (1| id),data=cham2, family = binomial)
summary(glm8)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ year_scale + age + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1777.1   1797.9   -884.6   1769.1     1324 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6486 -1.1413  0.6819  0.7907  1.0438 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.2128   0.4613  
## Number of obs: 1328, groups:  id, 217
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.577851   0.159379   3.626 0.000288 ***
## year_scale  -0.006749   0.066039  -0.102 0.918604    
## age         -0.016762   0.015809  -1.060 0.289001    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr) yr_scl
## year_scale  0.146       
## age        -0.905 -0.163
surdispersion_glm8 <- 1777/1324;surdispersion_glm8
## [1] 1.342145
glm9 <- glmer(fec ~ year_scale*age + (1| id),data=cham2, family = binomial)
summary(glm9)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ year_scale * age + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1779.1   1805.1   -884.6   1769.1     1323 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6689 -1.1399  0.6839  0.7907  1.0327 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.2092   0.4574  
## Number of obs: 1328, groups:  id, 217
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     0.575862   0.159548   3.609 0.000307 ***
## year_scale     -0.032348   0.161967  -0.200 0.841699    
## age            -0.016602   0.015815  -1.050 0.293816    
## year_scale:age  0.002755   0.015923   0.173 0.862625    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) yr_scl age   
## year_scale   0.124              
## age         -0.906 -0.119       
## year_scal:g -0.072 -0.913  0.059
surdispersion_glm9 <- 1779/1323;surdispersion_glm9
## [1] 1.344671

Question 3

Représentation graphique des données

Création tableau de la fécondité totale par chamois

cham_id = cham2 %>% 
  group_by(id) %>% 
  dplyr::summarise(feconditetotale= sum(fec), long=long, anneetot=(ydth-min(year)+1)) %>%
  unique()%>%
  mutate(fecrelative=feconditetotale/anneetot)%>%
  drop_na(long)
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.

Représentation graphique par id

p7<-ggplot(cham_id, aes(x=long, y=feconditetotale)) + geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity"); p7
p7 + geom_smooth() 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
p8<-ggplot(data = cham_id, aes(x=long, y=fecrelative))+geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity"); p8
p9<-ggplot(cham_id, aes(x=long, y=feconditetotale)) + geom_count(); p9
p10<-ggplot(cham_id, aes(x=long, y=feconditetotale)) + geom_jitter(width = 0.25, height = 0.25)+ geom_smooth(); p10
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'